异常值处理
The following article is from 风控建模 Author Monica
本段代码是异常值处理,只要是大于99分位数的让其等于99分位数,小于1分位数的让其等于1分位数。
代码如下:
其中:
lib =逻辑库;
out=输出的数据集;
open_data=包含99分位数和1分位数的数据集;
input_data=要处理异常值的数据集。
data model_data6;
set model_data5;
drop &py_var;
run;
/*异常值处理:大于99分位的等于99分位,小于1分位的等于1分位*/
proc contents data = model_data6 noprintout =model_var6(keep = name type label);
run;
proc sql noprint;
select name into:num_varseparated by''frommodel_var6
where name not in("default","account_place_p","education","house_product_type","insurance_money_type","position","submit_time1","submit_time2","unit_type") and type = 1;
quit;
%put &num_var.;
/*创建1和99分位数*/
odslisting close;
ods results off;
odsoutput
extremeobs =num_extremobs2
quantiles =num_quantiles;
proc univariate data =model_data6;
var &num_var. ;
run;
ods output close;
ods results on;
odslisting;
data num_quantiles_99 num_quantiles_1;
set num_quantiles;
if index(quantile, "99%") then output num_quantiles_99;
if index (quantile,"1%") then output num_quantiles_1;
run;
/*给数值型变量加后缀*/
data model_var7;
set model_var6;
if type = 1then name1 = compress(name)||"_"||"2";
run;
/*拼接所有数值型变量99分位数表*/
procsql;
create table tmp_num_991 as
select a.name,name1,b.estimate
from model_var7 a leftjoin num_quantiles_99 b
on a.name = b.varname
wheretype =1and Estimate ^= .
;
createtabletmp_num_11 as
select a.name,name1,b.estimate
from model_var7 a leftjoin num_quantiles_1 b
on a.name = b.varname
wheretype =1and Estimate ^=.
;
quit;
procsortdata = tmp_num_11;
by name;
run;
procsortdata = tmp_num_991;
by name;
run;
data tmp_num;
merge tmp_num_11(rename = (Estimate =Estimate1)) tmp_num_991(rename = (Estimate = Estimate99));
attrib _all_ label='';
run;
%macro yq_ycz(lib =,out=,open_data=,input_data=);
%if%sysfunc(exist(&lib..&out.)) ne 0%then%do;
proc datasetslib = &lib.nolist;
delete &out.;
quit;
%end;
%let dsid = %sysfunc(open(&open_data.));
%if &dsid gt 0%then%do;
%let nobs = %sysfunc(attrn(&dsid,nobs));
%do i = 1%to &nobs;
%let rc = %sysfunc(fetchobs(&dsid,&i));
%let varnume1 = %sysfunc(varnum(&dsid,name));
%let value1 = %sysfunc(getvarc(&dsid,&varnume1));
%let varnume2 = %sysfunc(varnum(&dsid,name1));
%let value2 = %sysfunc(getvarc(&dsid,&varnume2));
%let varnume3 = %sysfunc(varnum(&dsid,estimate1));
%let value3 = %sysfunc(getvarn(&dsid,&varnume3));/*1分位*/
%let varnume4 = %sysfunc(varnum(&dsid,Estimate99));
%let value4 = %sysfunc(getvarn(&dsid,&varnume4));/*99分位*/
data out_&i.;
set &lib..&input_data.(keep = &value1.);
if &value1.<&value3. then &value2. =&value3.;
else if &value1.>&value4. then &value2. =&value4.;
else &value2. = &value1.;
keep &value2.;
label &value2. = "&value2.";
run;
%if &i.=1%then%do;
data &out.;
set out_&i.;
keep &value2.;
run;
%end;
%else %do;
data &out.;
merge &out. out_&i.;
run;
%end;
%end;
%let dsid = %sysfunc(close(&dsid));
%end;
%mend;
%yq_ycz(lib=work,out = outdata_num,open_data = tmp_num,input_data = model_data6);
procdatasetslib=work nolist;
delete out_: / memtype=data;
quit;
来源|风控建模
作者|Monica
更多精彩,戳这里: